#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sun Dec 29 14:32:57 2019

@author: hyungmokson
"""

import csv
import numpy as np
from os.path import isfile, join

## Header index:
## 0: Measurement time
## 1: Variable
## 2: STIRAP freq 1 (THz)
## 3: STIRAP freq 2 (THz)
## 4: STIRAP freq 2 (THz)
filename = 'thermalization_with Na.csv'
with open(filename, 'r') as csvfile:
    csvreader = csv.reader(csvfile)
    header = next(csvreader)

    ncols = len(csvfile.readline().split(','))

#print("header = " + str(header))

list_cols = list(range(1, ncols))
list_cols.remove(2)
list_cols.remove(3)
list_cols.remove(4) ## remove STIRAP frequency entries
raw = np.loadtxt(filename, delimiter = ',', skiprows=1, usecols = tuple(list_cols))

data_idx_start = 1
data = {}
for row in raw:
    key = row[0]
    if data.get(key, -1) == -1:
        data[key] = [row[data_idx_start:]]
    else:
        data[key].append(row[data_idx_start:])

keys = list(data.keys())
arg_list = np.argsort(keys)

data_array = [] 
i = 0
for key, value in data.items():
    n = len(data[key])
    print("for " + str(key) + ":  " + str(n))
    temp = np.array(value)
    
    avg_array = np.average(temp, axis = 0)
    std_array = np.std(temp, axis = 0)
    
    ## last two is always fit error for temperature measurement
    stand_err_array = std_array[:-2]/np.sqrt(n-1) 

    ## For each independent sampling given the s.t.d of the distribution, it also has
    ## uncertainty of the sampling itself, which is represented by the fit error.
    ## Those two should be added in quarature.
    ## The s.t.d of the distribution is common for every sampling (or measurement).
    ## --> the contribution of this is calcualted by stand_err_array, which is basically the standard error of the mean
    ## However, each measurement has different uncertainty of the sampling (i.e. fit error)
    ## --> the contribution of this is calculated like beliow:
    stand_err_rms_array = np.sqrt(np.sum(temp**2, axis=0))/n
    
    ## for number of particles, fit error is too small to be considered. 
    ## only statistical error is considered.
    nanum_err = stand_err_array[0]
    molnum_err = stand_err_array[1]
    
    ## only for temperature, estimate the total error by considering 
    ## the measurement error (i.e. fit error)
    ## for molecule temperature, the total error is the quadrature sum of
    ## statistical factor and the uncertainty of the measurement (i.e. fit error)
    moltemp_err = np.sqrt(stand_err_array[-2]**2 + stand_err_rms_array[-2]**2)
    ## for na temperature
    natemp_err = np.sqrt(stand_err_array[-1]**2 + stand_err_rms_array[-1]**2)

    avg_data_array = avg_array[:4]
    err_array = [nanum_err, molnum_err, moltemp_err, natemp_err]
    
    ## array for each "Variable" with avgerage data and total error
#    final_array = []
#    q = np.insert(avg_data_array, 0, key)
    final_array = np.append(avg_data_array, err_array)

    ## append into the final datay array
    data_array.append(final_array)        
    i += 1

output = []
## sort by the variables in ascending order
for idx in arg_list:
    temp = np.insert(data_array[idx], 0, keys[idx])
    output.append(temp)
    
print(output)

idx_nanum = 1
with open(join('csv','na num.csv'), 'w') as file:
    writer = csv.writer(file, delimiter = ',')
    for row in output:
        writer.writerow([row[0], row[idx_nanum], row[idx_nanum+4]])

idx_molnum = 2
with open(join('csv', 'mol num.csv'), 'w') as file:
    writer = csv.writer(file, delimiter = ',')
    for row in output:
        writer.writerow([row[0], row[idx_molnum], row[idx_molnum+4]])

idx_moltemp = 3
with open(join('csv','mol temp.csv'), 'w') as file:
    writer = csv.writer(file, delimiter = ',')
    for row in output:
        writer.writerow([row[0], row[idx_moltemp], row[idx_moltemp+4]])

idx_natemp = 4
with open(join('csv','na temp.csv'), 'w') as file:
    writer = csv.writer(file, delimiter = ',')
    for row in output:
        writer.writerow([row[0], row[idx_natemp], row[idx_natemp+4]])

        